**************************************************
* CLEAN USE‑OF‑FORCE DATA 
**************************************************

* Define tempfiles for intermediate datasets
tempfile base passact singlemult

*-------------------------------------------------
* A. Base use‑of‑force indicators
*-------------------------------------------------
use "force.dta", clear

* 1. Extract year and drop missing incidents
gen year = real(substr(dt, 1, 4))
drop if missing(aid)

* 2. Overall use‑of‑force count
gen uof_tot = 1

* 3. Civilian race indicators
gen uof_civ_rac_blk = (civilian_race == "BLACK")
gen uof_civ_rac_wht = (civilian_race == "WHITE")
gen uof_civ_rac_hsp = inlist(civilian_race, "WHITE HISPANIC", "BLACK HISPANIC")
gen uof_civ_rac_asn = strpos(civilian_race, "ASIAN") > 0
gen uof_civ_rac_oth = inlist(civilian_race, ///
    "AMER IND/ALASKAN NATIVE", "AMER INDIAN / ALASKAN NATIVE", "UNKNOWN")

* 4. Civilian gender indicators
gen uof_civ_gen_mal = (civilian_gender == "MALE")
gen uof_civ_gen_fem = (civilian_gender == "FEMALE")

* 5. Race × gender combinations
foreach r in blk wht hsp asn oth {
    foreach g in mal fem {
        gen uof_`r'_`g' = uof_civ_rac_`r' & uof_civ_gen_`g'
    }
}

* 6. Injury status indicators
gen uof_inj_yin = inlist(civilian_alleged_injury, "True", "Yes")
gen uof_inj_nin = !uof_inj_yin

* 7. Race × gender × injury combinations
foreach r in blk wht hsp asn oth {
    foreach g in mal fem {
        foreach i in yin nin {
            gen uof_`r'_`g'_`i' = uof_civ_rac_`r' & uof_civ_gen_`g' & uof_inj_`i'
        }
        gen uof_`r'_`g'_all = uof_civ_rac_`r' & uof_civ_gen_`g'
    }
}

* 8. Race × injury totals
foreach r in blk wht hsp asn oth {
    foreach i in yin nin {
        gen uof_`r'_maf_`i' = uof_civ_rac_`r' & uof_inj_`i'
    }
    gen uof_`r'_maf_all = uof_civ_rac_`r'
}

* 9. Collapse to one row per incident
keep aid uof_*
duplicates drop aid
collapse (sum) uof_*, by(aid)

compress
save `base', replace


*-------------------------------------------------
* B. Passive vs. active resistance
*-------------------------------------------------
use "force.dta", clear
drop if missing(aid)
keep if person == "Member Action"

* Tag passive vs. active resister
gen passive_active = ""
replace passive_active = "passive" if resistance_type == "Passive Resister"
replace passive_active = "active"  if inlist(resistance_type, ///
    "Active Resister", "Assailant Assault/Battery", "Assailant Deadly Force")

* Create race × resistance flags
gen uof_blk_maf_active   = (civilian_race == "BLACK" & passive_active == "active")
gen uof_wht_maf_active   = (civilian_race == "WHITE" & passive_active == "active")
gen uof_hsp_maf_active   = inlist(civilian_race, "WHITE HISPANIC", "BLACK HISPANIC") & passive_active == "active"
gen uof_asn_maf_active   = (civilian_race == "ASIAN/PACIFIC ISLANDER" & passive_active == "active")
gen uof_oth_maf_active   = inlist(civilian_race, "AMER IND/ALASKAN NATIVE", "UNKNOWN") & passive_active == "active"

gen uof_blk_maf_passive  = (civilian_race == "BLACK" & passive_active == "passive")
gen uof_wht_maf_passive  = (civilian_race == "WHITE" & passive_active == "passive")
gen uof_hsp_maf_passive  = inlist(civilian_race, "WHITE HISPANIC", "BLACK HISPANIC") & passive_active == "passive"
gen uof_asn_maf_passive  = (civilian_race == "ASIAN/PACIFIC ISLANDER" & passive_active == "passive")
gen uof_oth_maf_passive  = inlist(civilian_race, "AMER IND/ALASKAN NATIVE", "UNKNOWN") & passive_active == "passive"

* Collapse to one row per incident
keep aid uof_*
collapse (sum) uof_*, by(aid)

compress
save `passact', replace


*-------------------------------------------------
* C. Single vs. multiple officers per incident
*-------------------------------------------------
use "force.dta", clear
drop if missing(aid)
keep if person == "Member Action"
keep event uid_force dt aid uid action

* Count officers per event
bys event: gen numofficers = _N
keep event aid numofficers civilian_race
duplicates drop

* Tag single vs. multiple
gen single_multiple = cond(numofficers == 1, "sing", "mult")

* Create race × officer-count flags
gen uof_blk_maf_single   = (civilian_race == "BLACK" & single_multiple == "sing")
gen uof_wht_maf_single   = (civilian_race == "WHITE" & single_multiple == "sing")
gen uof_hsp_maf_single   = inlist(civilian_race, "WHITE HISPANIC", "BLACK HISPANIC") & single_multiple == "sing"
gen uof_asn_maf_single   = (civilian_race == "ASIAN/PACIFIC ISLANDER" & single_multiple == "sing")
gen uof_oth_maf_single   = inlist(civilian_race, "AMER IND/ALASKAN NATIVE", "UNKNOWN") & single_multiple == "sing"

gen uof_blk_maf_multiple = (civilian_race == "BLACK" & single_multiple == "mult")
gen uof_wht_maf_multiple = (civilian_race == "WHITE" & single_multiple == "mult")
gen uof_hsp_maf_multiple = inlist(civilian_race, "WHITE HISPANIC", "BLACK HISPANIC") & single_multiple == "mult"
gen uof_asn_maf_multiple = (civilian_race == "ASIAN/PACIFIC ISLANDER" & single_multiple == "mult")
gen uof_oth_maf_multiple = inlist(civilian_race, "AMER IND/ALASKAN NATIVE", "UNKNOWN") & single_multiple == "mult"

* Collapse to one row per incident
keep aid uof_*
collapse (sum) uof_*, by(aid)

compress
save `singlemult', replace


*-------------------------------------------------
* D. Merge all metrics into final dataset
*-------------------------------------------------
use `base', clear

merge 1:1 aid using `passact'
drop if _merge == 2
drop _merge
foreach v of varlist *_active *_passive {
    replace `v' = 0 if missing(`v')
}

merge 1:1 aid using `singlemult'
drop if _merge == 2
drop _merge
foreach v of varlist *_single *_multiple {
    replace `v' = 0 if missing(`v')
}

compress
save "uof_cleaned.dta", replace



**************************************************
* CLEAN ARREST DATA 
**************************************************

* Define tempfiles for intermediate storage
tempfile base weapon

*-------------------------------------------------
* A. Base arrest indicators
*-------------------------------------------------
clear
use "arrest.dta", clear

* 1. Extract year and drop records without AID
gen year = real(substr(dt, 1, 4))
drop if missing(aid)

* 2. Overall arrest count
gen arr_tot = 1

* 3. Officer role flags
gen arr_role_fir = (role == "FIRST ARRESTING OFFICER")
gen arr_role_sec = (role == "SECOND ARRESTING OFFICER")
gen arr_role_ass = (role == "ASSISTING ARRESTING OFFICER")

* 4. Civilian race flags
gen arr_civ_rac_blk = (civilian_race == "BLACK")
gen arr_civ_rac_wht = (civilian_race == "WHITE")
gen arr_civ_rac_hsp = inlist(civilian_race, "WHITE HISPANIC", "BLACK HISPANIC")
gen arr_civ_rac_asn = (civilian_race == "ASIAN/PACIFIC ISLANDER")
gen arr_civ_rac_oth = inlist(civilian_race, "AMER IND/ALASKAN NATIVE", "UNKNOWN")

* 5. Civilian gender flags
gen arr_civ_gen_mal = (civilian_gender == "M")
gen arr_civ_gen_fem = (civilian_gender == "F")

* 6. Charge type flags
gen arr_type_fel = (charge_type == "F")
gen arr_type_mis = (charge_type == "M")

* 7. Race × gender × role × charge combinations
foreach r in blk wht hsp asn oth {
    foreach g in mal fem {
        * race & gender
        gen arr_`r'_`g'_all     = arr_civ_rac_`r' & arr_civ_gen_`g'
        gen arr_`r'_`g'_fel_all = arr_`r'_`g'_all & arr_type_fel
        gen arr_`r'_`g'_mis_all = arr_`r'_`g'_all & arr_type_mis

        * first arresting officer
        gen arr_`r'_`g'_fir      = arr_`r'_`g'_all & arr_role_fir
        gen arr_`r'_`g'_fel_fir = arr_`r'_`g'_fir & arr_type_fel
        gen arr_`r'_`g'_mis_fir = arr_`r'_`g'_fir & arr_type_mis

        * second arresting officer
        gen arr_`r'_`g'_sec      = arr_`r'_`g'_all & arr_role_sec
        gen arr_`r'_`g'_fel_sec = arr_`r'_`g'_sec & arr_type_fel
        gen arr_`r'_`g'_mis_sec = arr_`r'_`g'_sec & arr_type_mis

        * assisting arresting officer
        gen arr_`r'_`g'_ass      = arr_`r'_`g'_all & arr_role_ass
        gen arr_`r'_`g'_fel_ass = arr_`r'_`g'_ass & arr_type_fel
        gen arr_`r'_`g'_mis_ass = arr_`r'_`g'_ass & arr_type_mis
    }
}

* 8. Race × charge × role totals (all genders)
foreach r in blk wht hsp asn oth {
    gen arr_`r'_maf_all     = arr_civ_rac_`r'
    gen arr_`r'_maf_fel_all = arr_`r'_maf_all & arr_type_fel
    gen arr_`r'_maf_mis_all = arr_`r'_maf_all & arr_type_mis

    gen arr_`r'_maf_fir     = arr_`r'_maf_all & arr_role_fir
    gen arr_`r'_maf_fel_fir = arr_`r'_maf_fir & arr_type_fel
    gen arr_`r'_maf_mis_fir = arr_`r'_maf_fir & arr_type_mis

    gen arr_`r'_maf_sec     = arr_`r'_maf_all & arr_role_sec
    gen arr_`r'_maf_fel_sec = arr_`r'_maf_sec & arr_type_fel
    gen arr_`r'_maf_mis_sec = arr_`r'_maf_sec & arr_type_mis

    gen arr_`r'_maf_ass     = arr_`r'_maf_all & arr_role_ass
    gen arr_`r'_maf_fel_ass = arr_`r'_maf_ass & arr_type_fel
    gen arr_`r'_maf_mis_ass = arr_`r'_maf_ass & arr_type_mis
}

* 9. Collapse to one row per incident and save base
keep aid arr_*
duplicates drop aid
collapse (sum) arr_*, by(aid)
compress
save `base', replace

*-------------------------------------------------
* B. Weapon-related arrest indicators
*-------------------------------------------------
preserve
use "arrest.dta", clear

* 1. Identify weapon-related charges
gen weapon = strpos(statute, "FIREARM") | strpos(statute, "WEAPON") | strpos(statute, "GUN")
drop if weapon == 0 | missing(aid)

* 2. Keep only first or second arresting officer
keep if inlist(role, "FIRST ARRESTING OFFICER", "SECOND ARRESTING OFFICER")

* 3. Race-specific weapon flags by role
gen arr_blk_maf_weapon_fir = (civilian_race == "BLACK" & role == "FIRST ARRESTING OFFICER")
gen arr_wht_maf_weapon_fir = (civilian_race == "WHITE" & role == "FIRST ARRESTING OFFICER")
gen arr_hsp_maf_weapon_fir = inlist(civilian_race, "WHITE HISPANIC", "BLACK HISPANIC") & role == "FIRST ARRESTING OFFICER"
gen arr_asn_maf_weapon_fir = (civilian_race == "ASIAN/PACIFIC ISLANDER" & role == "FIRST ARRESTING OFFICER")
gen arr_oth_maf_weapon_fir = inlist(civilian_race, "AMER IND/ALASKAN NATIVE", "UNKNOWN") & role == "FIRST ARRESTING OFFICER"

gen arr_blk_maf_weapon_sec = (civilian_race == "BLACK" & role == "SECOND ARRESTING OFFICER")
gen arr_wht_maf_weapon_sec = (civilian_race == "WHITE" & role == "SECOND ARRESTING OFFICER")
gen arr_hsp_maf_weapon_sec = inlist(civilian_race, "WHITE HISPANIC", "BLACK HISPANIC") & role == "SECOND ARRESTING OFFICER"
gen arr_asn_maf_weapon_sec = (civilian_race == "ASIAN/PACIFIC ISLANDER" & role == "SECOND ARRESTING OFFICER")
gen arr_oth_maf_weapon_sec = inlist(civilian_race, "AMER IND/ALASKAN NATIVE", "UNKNOWN") & role == "SECOND ARRESTING OFFICER"

* 4. Collapse and save weapon indicators
keep aid arr_*_weapon_*
duplicates drop aid
collapse (sum) arr_*_weapon_*, by(aid)
compress
save `weapon', replace
restore

*-------------------------------------------------
* C. Merge base + weapon into final dataset
*-------------------------------------------------
use `base', clear

merge 1:1 aid using `weapon'
drop if _merge == 2
drop _merge

* Replace missing weapon flags with zero
foreach var of varlist *_weapon_* {
    replace `var' = 0 if missing(`var')
}

compress
save "arrests_cleaned.dta", replace



**************************************************
* CLEAN STOP DATA
**************************************************

clear
use "stop.dta", clear

* 1. Extract year and drop records with missing assignment ID
gen year = real(substr(dt, 1, 4))
drop if missing(aid)

* 2. Overall stop indicator
gen stp_tot = 1

* 3. Civilian race indicators
gen stp_civ_rac_blk = (civilian_race == "BLACK")
gen stp_civ_rac_wht = (civilian_race == "WHITE")
gen stp_civ_rac_hsp = inlist(civilian_race, "WHITE HISPANIC", "BLACK HISPANIC")
gen stp_civ_rac_asn = (civilian_race == "ASIAN/PACIFIC ISLANDER")
gen stp_civ_rac_oth = inlist(civilian_race, ///
    "AMER IND/ALASKAN NATIVE", "NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER", "UNKNOWN")

* 4. Civilian gender indicators
gen stp_civ_gen_mal = (civilian_gender == "M")
gen stp_civ_gen_fem = (civilian_gender == "F")

* 5. Race × gender combinations
foreach r in blk wht hsp asn oth {
    foreach g in mal fem {
        gen stp_civ_`g'_`r' = stp_civ_gen_`g' & stp_civ_rac_`r'
    }
}

* 6. Role indicators
gen stp_role_fir = (role == "first")
gen stp_role_sec = (role == "second")
gen stp_role_sup = (role == "supervisor")

* 7. Stop reason indicators
gen stp_type_trf = (vehicle_involved == "true")
gen stp_type_nar = (suspect_narcotic_activity == "true")
gen stp_type_fit = (fits_description_offender == "true")
gen stp_type_vio = (violent_crime == "true")
gen stp_type_sus = (other_reasonable_suspicion == "true")
gen stp_type_oth = (other_factor == "true")

* 8. Race × gender × role flags
foreach r in blk wht hsp asn oth {
    foreach g in mal fem {
        gen stp_`r'_`g'_all = stp_civ_`g'_`r'
        gen stp_`r'_`g'_fir = stp_`r'_`g'_all & stp_role_fir
        gen stp_`r'_`g'_sec = stp_`r'_`g'_all & stp_role_sec
        gen stp_`r'_`g'_sup = stp_`r'_`g'_all & stp_role_sup
    }
}

* 9. Race × role totals (all genders)
foreach r in blk wht hsp asn oth {
    gen stp_`r'_maf_all = stp_civ_rac_`r'
    gen stp_`r'_maf_fir = stp_`r'_maf_all & stp_role_fir
    gen stp_`r'_maf_sec = stp_`r'_maf_all & stp_role_sec
    gen stp_`r'_maf_sup = stp_`r'_maf_all & stp_role_sup
}

* 10. Collapse to one record per incident
keep aid stp_*
collapse (sum) stp_*, by(aid)

compress
save "stops_cleaned.dta", replace




**************************************************
* MERGE ASSIGNMENT DATA WITH OUCTOME DATA
**************************************************

clear
use "assignment-with-L2-addresses.dta", clear

* 1. Merge use-of-force, arrests, and stops by incident (aid)
merge 1:m aid using "uof_cleaned.dta"
drop if _merge == 2
drop _merge

merge 1:m aid using "arrests_cleaned.dta"
drop if _merge == 2
drop _merge

merge 1:m aid using "stops_cleaned.dta"
drop if _merge == 2
drop _merge

* 2. Fill zeros for missing counts on assignment days where data exist (these are days when the officer worked but took no action)
foreach var of varlist uof_*  arr_* stp_* {
    replace `var' = 0 if missing(`var') 

}

* 3. Drop officers with multiple records on the same day
bys uid date: gen _nrec = _N
drop if _nrec > 1
drop _nrec

* 4. Rename assignment and officer variables
rename rank        assgn_rank
rename unit        assgn_unit
rename beat        assgn_beat
rename watch       assgn_shift
rename vehicle     assgn_vehicle
rename last_name   off_last_name
rename first_name  off_first_name
rename middle_initial off_middle
rename yob         off_yob
rename race        off_race
rename gender      off_gender
rename present_for_duty assgn_present
rename absence_code    assgn_absence_code
rename absence_description assgn_absence_desc

* 5. Parse appointment date
gen off_apt_date = date(appointed, "YMD")
format off_apt_date %td

* 6. Recode officer demographics to lowercase categories
replace off_race = "aapi"   if off_race == "ASIAN/PACIFIC ISLANDER"
replace off_race = "black"  if off_race == "BLACK"
replace off_race = "hisp"   if inlist(off_race, "WHITE HISPANIC", "BLACK HISPANIC", "HISPANIC")
replace off_race = "native" if off_race == "AMER IND/ALASKAN NATIVE"
replace off_race = "white"  if off_race == "WHITE"
drop if off_race == "UNKNOWN"

replace off_gender = "female" if off_gender == "F"
replace off_gender = "male"   if off_gender == "M"

* 7. Clean up address/match variables
rename score_match    add_score_match
rename fulladdress_L2 add_L2address

* 8. Drop unnecessary vars
drop lastname firstname middle_init fulladdress_L2 appointed dt_start dt_end modified_by_last modified_by_first modified_date

* 9. Rename prefixed L2 and assignment vars
foreach v in lastname_L2 firstname_L2 middle_init_L2 {
    rename `v' off_`v'
}
foreach v in date dt_start dt_end {
    rename `v' assgn_`v'
}

* 10. Split assignment datetime into start/end times
foreach t in start end {
    split assgn_dt_`t', parse("T")
    gen assgn_`t'_hour   = real(substr(assgn_dt_`t'2, 1, 2))
    gen assgn_`t'_minute = real(substr(assgn_dt_`t'2, 4, 2))
    gen assgn_`t'_time   = assgn_`t'_hour + assgn_`t'_minute/60
    replace assgn_`t'_time = . if assgn_present == "false"
    drop assgn_dt_`t'1 assgn_dt_`t'2 assgn_`t'_hour assgn_`t'_minute
}

* 11. Compute assignment duration and weekday
gen assgn_duration = assgn_end_time - assgn_start_time
replace assgn_duration = assgn_duration + 24 if assgn_end_time < assgn_start_time
replace assgn_duration = . if assgn_present == "false"

gen _dow = dow(assgn_date_num)
label define wd 0 "Sun" 1 "Mon" 2 "Tue" 3 "Wed" 4 "Thu" 5 "Fri" 6 "Sat"
gen assgn_weekday = string(_dow), format(%9.0g)
label values assgn_weekday wd
drop _dow

* 12. First and last day present per officer
sort uid assgn_date_num
by uid: egen assgn_first_date_present = min(cond(assgn_present=="true", assgn_date_num, .))
by uid: egen assgn_last_date_present  = max(cond(assgn_present=="true", assgn_date_num, .))
format assgn_first_date_present assgn_last_date_present %td

* 13. Reorder and label key variables
order uid suid aid off_* add_* assgn_* uof_* arr_* stp_*
label var suid              "Unique ID in L2 data"
label var has_address       "Matched to L2 address"
label var add_score_match   "Reclink match score"
label var add_L2address     "L2 address string"
label var add_block         "Census block"
label var add_lat           "Latitude"
label var add_lon           "Longitude"
label var add_pol_district  "Police district"
label var add_pol_beat      "Police beat"
label var add_ca_num        "Community area number"
label var add_ca_name       "Community area name"

compress
save "masterfile.dta", replace
